Kapitel 6.2: Zentralität – Ergebnisse¶
Das Notebook ergänzt Kapitel 6.2 'Zentralität'.
Import¶
In [1]:
import pandas as pd
import plotly.express as px
from tqdm.notebook import tqdm
from resources_statistics import *
from resources_geschichtslyrik import *
import random
import matplotlib.pyplot as plt
from scipy import stats
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from scipy.spatial import distance
In [2]:
pd.set_option('display.max_colwidth', None)
In [3]:
meta = pd.read_json(r"../resources/meta.json")
In [4]:
meta_mode_strikt = pd.read_csv("../resources/more/vectors/mode_strikt.csv", index_col = [0])
meta_mode_flexibel = pd.read_csv("../resources/more/vectors/mode_flexibel.csv", index_col = [0])
meta_mode_strikt1850 = pd.read_csv("../resources/more/vectors/mode_strikt1850.csv", index_col = [0])
In [5]:
features_used_df = pd.read_csv("../resources/more/vectors/vectordist_features.csv", index_col = [0])
meta_all_features = pd.read_csv("../resources/more/vectors/vectordist.csv", index_col = [0])
features_used = features_used_df['feature'].tolist()
In [6]:
dm_manhattan = pd.read_csv("../resources/more/vectors/vectordist_dm_manhattan.csv", index_col = [0])
dm_euclidean = pd.read_csv("../resources/more/vectors/vectordist_dm_euclidean.csv", index_col = [0])
dm_cosine = pd.read_csv("../resources/more/vectors/vectordist_dm_cosine.csv", index_col = [0])
dm_alldistances = pd.read_csv("../resources/more/vectors/vectordist_dm_alldistances.csv", index_col = [0])
In [7]:
dm_manhattan_unweighted = pd.read_csv("../resources/more/vectors/vectordist_dm_manhattan_unweighted.csv", index_col = [0])
dm_euclidean_unweighted = pd.read_csv("../resources/more/vectors/vectordist_dm_euclidean_unweighted.csv", index_col = [0])
dm_cosine_unweighted = pd.read_csv("../resources/more/vectors/vectordist_dm_cosine_unweighted.csv", index_col = [0])
dm_alldistances_unweighted = pd.read_csv("../resources/more/vectors/vectordist_dm_alldistances_unweighted.csv", index_col = [0])
In [8]:
meta_dists = pd.read_csv("../resources/more/vectors/vectordist_dists.csv", index_col = [0])
Korpora¶
In [9]:
meta['count'] = meta.query("corpus=='anth'").groupby('author_title')['author_title'].transform('count')
In [10]:
meta_anth = (
meta
.query("corpus=='anth'")
.query("1850 <= year <= 1918")
.query("geschichtslyrik == 1")
.drop_duplicates(subset='author_title')
.reset_index(drop = True)
)
In [11]:
modcanon_authors = ['Hofmannsthal, Hugo von', 'Rilke, Rainer Maria', 'George, Stefan', 'Heym, Georg']
meta_modcanon = (
meta
.query("author in @modcanon_authors")
.query("1850 <= year <= 1918")
.query("geschichtslyrik == 1")
.drop_duplicates(subset='author_title')
.reset_index(drop = True)
)
In [12]:
muench_authors = ['Münchhausen, Börries von', 'Miegel, Agnes', 'Strauß und Torney, Lulu von']
meta_muench = (
meta
.query("author in @muench_authors")
.query("1850 <= year <= 1918")
.query("geschichtslyrik == 1")
.drop_duplicates(subset='author_title')
.reset_index(drop = True)
)
In [13]:
meta_all = pd.concat([meta_anth, meta_modcanon, meta_muench])
meta_all = meta_all.drop_duplicates(subset = 'id')
meta_all = meta_all.reset_index(drop = True)
meta_all['korpus_anth'] = [True if x in list(meta_anth['author_title']) else False for x in meta_all['author_title']]
meta_all['korpus_modcanon'] = [True if x in modcanon_authors else False for x in meta_all['author']]
meta_all['korpus_muench'] = [True if x in muench_authors else False for x in meta_all['author']]
meta_all.shape[0]
Out[13]:
2063
In [14]:
print(meta_all.shape[0])
print(meta_mode_strikt.shape[0])
print(meta_mode_flexibel.shape[0])
print(meta_mode_strikt1850.shape[0])
print(meta_all_features.shape[0])
print(meta_dists.shape[0])
2063 2063 2063 2063 2063 2063
In [15]:
meta_all = meta_all.merge(meta_mode_strikt, on = 'id')
meta_all = meta_all.merge(meta_mode_flexibel, on = 'id')
meta_all = meta_all.merge(meta_mode_strikt1850, on = 'id')
meta_all = meta_all.merge(meta_all_features, on = 'id')
meta_all = meta_all.merge(meta_dists, on = 'id')
meta_all.shape[0]
Out[15]:
2063
Berechnung Netzwerk¶
In [16]:
this_dm = dm_manhattan
In [17]:
import networkx as nx
In [18]:
def create_edge_table (distance_matrix, filter_std = 1.5):
results = distance_matrix.stack().reset_index()
results.columns = ['text1', 'text2', 'distance']
results['similarity'] = results['distance'] - results['distance'].max()/2
results['similarity'] = [x * -1 for x in results['similarity']]
results['similarity'] = results['similarity'] + results['distance'].max()/2
results = results.loc[
results['text1'] != results['text2']
]
if filter_std:
results = results.loc[
results['similarity'] > results['similarity'].mean() + filter_std * results['similarity'].std()
]
return results
In [19]:
this_meta = meta_all.query("korpus_anth")
this_ids = this_meta['id']
this_index = this_meta.index
In [20]:
edge_table = create_edge_table(this_dm.loc[this_ids, this_ids], filter_std = False)
G = nx.from_pandas_edgelist(edge_table, 'text1', 'text2', ['distance', 'similarity'])
In [21]:
eigenvector_centrality_dic = nx.eigenvector_centrality(G, weight = 'similarity')
meta_all.loc[this_index, 'eigenvector_centrality'] = list(eigenvector_centrality_dic.values())
Zusammenhänge¶
In [22]:
meta_plot = meta_all.query("korpus_anth")
meta_plot = meta_plot.rename(columns={
'mode_score_strikt' : 'Modus (strikt)',
'dist_mean_euclidean' : '<br>Mittelwert Distanzen (euclidean, gewichtet)'
})
fig = px.box(
meta_plot,
x = 'Modus (strikt)',
y = '<br>Mittelwert Distanzen (euclidean, gewichtet)',
points = 'all',
hover_data = ['id', 'author', 'title',]
)
fig.update_layout(
width = 1000, height = 600,
xaxis=dict(tickfont=dict(size=20), titlefont=dict(size=20)),
yaxis=dict(tickfont=dict(size=20), titlefont=dict(size=20)),
showlegend = False
)
fig.write_image(f"plots/6.2 Zusammenhang zweier Verfahrensvarianten zur Messung von Zentralität.pdf")
fig.show()
In [23]:
meta_all[[
'mode_score_strikt', 'mode_score_flexibel',
'dist_centroid_manhattan_unweighted', 'dist_centroid_euclidean_unweighted',
'dist_centroid_cosine_unweighted', 'dist_centroid_alldistances_unweighted',
'dist_centroid_manhattan', 'dist_centroid_euclidean', 'dist_centroid_cosine', 'dist_centroid_alldistances',
'dist_mean_manhattan', 'dist_mean_euclidean', 'dist_mean_cosine', 'dist_mean_alldistances',
'dist_mean_manhattan_unweighted', 'dist_mean_euclidean_unweighted', 'dist_mean_cosine_unweighted',
'dist_mean_alldistances_unweighted',
'eigenvector_centrality'
]].corr()
Out[23]:
| mode_score_strikt | mode_score_flexibel | dist_centroid_manhattan_unweighted | dist_centroid_euclidean_unweighted | dist_centroid_cosine_unweighted | dist_centroid_alldistances_unweighted | dist_centroid_manhattan | dist_centroid_euclidean | dist_centroid_cosine | dist_centroid_alldistances | dist_mean_manhattan | dist_mean_euclidean | dist_mean_cosine | dist_mean_alldistances | dist_mean_manhattan_unweighted | dist_mean_euclidean_unweighted | dist_mean_cosine_unweighted | dist_mean_alldistances_unweighted | eigenvector_centrality | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| mode_score_strikt | 1.000000 | 0.952823 | -0.900802 | -0.914699 | -0.888176 | -0.914241 | -0.908581 | -0.903217 | -0.882121 | -0.911031 | -0.908174 | -0.906328 | -0.887145 | -0.911418 | -0.904079 | -0.919082 | -0.891435 | -0.915807 | 0.914046 |
| mode_score_flexibel | 0.952823 | 1.000000 | -0.887655 | -0.897019 | -0.896656 | -0.906832 | -0.877581 | -0.871240 | -0.878015 | -0.886252 | -0.877557 | -0.873846 | -0.880373 | -0.887011 | -0.887584 | -0.901716 | -0.898782 | -0.907626 | 0.884781 |
| dist_centroid_manhattan_unweighted | -0.900802 | -0.887655 | 1.000000 | 0.979699 | 0.940538 | 0.986774 | 0.880244 | 0.869071 | 0.866409 | 0.883292 | 0.881619 | 0.870721 | 0.869441 | 0.884200 | 0.998595 | 0.980713 | 0.942623 | 0.986269 | -0.881673 |
| dist_centroid_euclidean_unweighted | -0.914699 | -0.897019 | 0.979699 | 1.000000 | 0.952941 | 0.991597 | 0.896486 | 0.901334 | 0.874381 | 0.904018 | 0.899069 | 0.901863 | 0.878798 | 0.903991 | 0.983311 | 0.999396 | 0.955470 | 0.990811 | -0.901725 |
| dist_centroid_cosine_unweighted | -0.888176 | -0.896656 | 0.940538 | 0.952941 | 1.000000 | 0.979029 | 0.860968 | 0.856909 | 0.918110 | 0.885118 | 0.862470 | 0.857055 | 0.916545 | 0.886616 | 0.942132 | 0.954520 | 0.999925 | 0.979573 | -0.864334 |
| dist_centroid_alldistances_unweighted | -0.914241 | -0.906832 | 0.986774 | 0.991597 | 0.979029 | 1.000000 | 0.891894 | 0.888581 | 0.899602 | 0.903857 | 0.893752 | 0.889345 | 0.901565 | 0.904659 | 0.988128 | 0.992260 | 0.980548 | 0.999754 | -0.895228 |
| dist_centroid_manhattan | -0.908581 | -0.877581 | 0.880244 | 0.896486 | 0.860968 | 0.891894 | 1.000000 | 0.989582 | 0.947129 | 0.994697 | 0.998880 | 0.991083 | 0.954510 | 0.994123 | 0.880928 | 0.901617 | 0.865158 | 0.892813 | -0.997903 |
| dist_centroid_euclidean | -0.903217 | -0.871240 | 0.869071 | 0.901334 | 0.856909 | 0.888581 | 0.989582 | 1.000000 | 0.938454 | 0.992968 | 0.990802 | 0.999380 | 0.946257 | 0.991381 | 0.871650 | 0.904848 | 0.861245 | 0.888947 | -0.991212 |
| dist_centroid_cosine | -0.882121 | -0.878015 | 0.866409 | 0.874381 | 0.918110 | 0.899602 | 0.947129 | 0.938454 | 1.000000 | 0.969403 | 0.948296 | 0.938923 | 0.999565 | 0.971236 | 0.866761 | 0.879664 | 0.919263 | 0.901389 | -0.946271 |
| dist_centroid_alldistances | -0.911031 | -0.886252 | 0.883292 | 0.904018 | 0.885118 | 0.903857 | 0.994697 | 0.992968 | 0.969403 | 1.000000 | 0.995098 | 0.993368 | 0.975010 | 0.999639 | 0.884659 | 0.908604 | 0.888628 | 0.904792 | -0.994301 |
| dist_mean_manhattan | -0.908174 | -0.877557 | 0.881619 | 0.899069 | 0.862470 | 0.893752 | 0.998880 | 0.990802 | 0.948296 | 0.995098 | 1.000000 | 0.992528 | 0.955772 | 0.995407 | 0.883703 | 0.904222 | 0.866736 | 0.895136 | -0.999300 |
| dist_mean_euclidean | -0.906328 | -0.873846 | 0.870721 | 0.901863 | 0.857055 | 0.889345 | 0.991083 | 0.999380 | 0.938923 | 0.993368 | 0.992528 | 1.000000 | 0.946930 | 0.992440 | 0.873482 | 0.906108 | 0.861505 | 0.890062 | -0.993384 |
| dist_mean_cosine | -0.887145 | -0.880373 | 0.869441 | 0.878798 | 0.916545 | 0.901565 | 0.954510 | 0.946257 | 0.999565 | 0.975010 | 0.955772 | 0.946930 | 1.000000 | 0.976854 | 0.870009 | 0.884124 | 0.918031 | 0.903375 | -0.954167 |
| dist_mean_alldistances | -0.911418 | -0.887011 | 0.884200 | 0.903991 | 0.886616 | 0.904659 | 0.994123 | 0.991381 | 0.971236 | 0.999639 | 0.995407 | 0.992440 | 0.976854 | 1.000000 | 0.886078 | 0.908950 | 0.890155 | 0.905964 | -0.994878 |
| dist_mean_manhattan_unweighted | -0.904079 | -0.887584 | 0.998595 | 0.983311 | 0.942132 | 0.988128 | 0.880928 | 0.871650 | 0.866761 | 0.884659 | 0.883703 | 0.873482 | 0.870009 | 0.886078 | 1.000000 | 0.984388 | 0.944320 | 0.988446 | -0.883459 |
| dist_mean_euclidean_unweighted | -0.919082 | -0.901716 | 0.980713 | 0.999396 | 0.954520 | 0.992260 | 0.901617 | 0.904848 | 0.879664 | 0.908604 | 0.904222 | 0.906108 | 0.884124 | 0.908950 | 0.984388 | 1.000000 | 0.957110 | 0.991975 | -0.907192 |
| dist_mean_cosine_unweighted | -0.891435 | -0.898782 | 0.942623 | 0.955470 | 0.999925 | 0.980548 | 0.865158 | 0.861245 | 0.919263 | 0.888628 | 0.866736 | 0.861505 | 0.918031 | 0.890155 | 0.944320 | 0.957110 | 1.000000 | 0.981126 | -0.868728 |
| dist_mean_alldistances_unweighted | -0.915807 | -0.907626 | 0.986269 | 0.990811 | 0.979573 | 0.999754 | 0.892813 | 0.888947 | 0.901389 | 0.904792 | 0.895136 | 0.890062 | 0.903375 | 0.905964 | 0.988446 | 0.991975 | 0.981126 | 1.000000 | -0.896486 |
| eigenvector_centrality | 0.914046 | 0.884781 | -0.881673 | -0.901725 | -0.864334 | -0.895228 | -0.997903 | -0.991212 | -0.946271 | -0.994301 | -0.999300 | -0.993384 | -0.954167 | -0.994878 | -0.883459 | -0.907192 | -0.868728 | -0.896486 | 1.000000 |
In [24]:
meta_plot = round(meta_all[[
'mode_score_strikt', 'mode_score_flexibel',
'dist_mean_manhattan', 'dist_mean_euclidean', 'dist_mean_cosine', 'dist_mean_alldistances',
'dist_mean_manhattan_unweighted', 'dist_mean_euclidean_unweighted', 'dist_mean_cosine_unweighted',
'dist_mean_alldistances_unweighted',
'dist_centroid_manhattan',
'eigenvector_centrality'
]].corr(), 3).abs()
category_dic = {
'mode_score_strikt' : 'Modus (strikt) ',
'mode_score_flexibel' : 'Modus (flexibel) ',
'dist_mean_manhattan' : 'Mittelwert Distanzen (manhattan, gewichtet) ',
'dist_mean_euclidean' : 'Mittelwert Distanzen (euclidean, gewichtet) ',
'dist_mean_cosine' : 'Mittelwert Distanzen (cosine, gewichtet) ',
'dist_mean_alldistances' : 'Mittelwert Distanzen (alle, gewichtet) ',
'dist_mean_manhattan_unweighted' : 'Mittelwert Distanzen (manhattan, ungewichtet) ',
'dist_mean_euclidean_unweighted' : 'Mittelwert Distanzen (euclidean, ungewichtet) ',
'dist_mean_cosine_unweighted' : 'Mittelwert Distanzen (cosine, ungewichtet) ',
'dist_mean_alldistances_unweighted' : 'Mittelwert Distanzen (alle, ungewichtet) ',
'dist_centroid_manhattan' : 'Distanz zum Zentroid (manhattan, gewichtet) ',
'eigenvector_centrality' : 'Eigenvektorzentralität (manhattan, gewichtet) ',
}
meta_plot = meta_plot.rename(columns=category_dic, index=category_dic)
fig = px.imshow(
meta_plot,
text_auto=True,
aspect = "auto",
zmin=0.8,
)
fig.update_layout(
width = 1600, height = 800,
xaxis=dict(tickfont=dict(size=20), titlefont=dict(size=20)),
yaxis=dict(tickfont=dict(size=20), titlefont=dict(size=20)),
font=dict(size=20),
# showlegend = False
)
fig.write_image(f"plots/6.2 Korrelation verschiedener Verfahrensvarianten zur Messung von Zentralität.pdf")
fig.show()
Grundlegende Ergebnisse¶
Zentrale Merkmale (Modus_strikt)¶
In [25]:
authortitle = [meta.query("id == @x")['author_title'].tolist()[0] for x in meta_mode_strikt['id']]
korpus_anth = [True if x in meta_anth['author_title'].tolist() else False for x in authortitle]
meta_mode_strikt['korpus_anth'] = korpus_anth
(meta_mode_strikt
.query("korpus_anth")
.drop(["id", "mode_score_strikt", "missing_from_mode_strikt", "korpus_anth"], axis = 1)
).mean().round(2)
Out[25]:
strikt_Geschichtslyrik 1.00 strikt_empirisch 1.00 strikt_nicht theoretisch 0.97 strikt_Ballade (exakt) 0.54 strikt_Sprechinstanz nicht markiert 0.56 strikt_Sprechinstanz Zeit unklar 0.56 strikt_Erzählen (exakt) 0.57 strikt_Präsens und Präiteritum 0.42 strikt_Konkretheit 0.82 strikt_keine Positionierung zum Wissen 0.91 strikt_vergangenheitsomdinant 0.82 strikt_2 Zeitebenen (exakt) 0.46 strikt_zeitlich fixierbar 0.64 strikt_Beginn 1870 0.05 strikt_Ende 1870 0.04 strikt_keine Anachronismen 0.94 strikt_kein Gegenwartsbezug 0.71 strikt_Europa (exakt) 0.95 strikt_Heiliges Römisches Reich (exakt) 0.41 strikt_Kein Kleinraum 0.39 strikt_Handlung (exakt) 0.58 strikt_Krieg (exakt) 0.28 strikt_positive Bewertung von Krieg (exakt) 0.15 strikt_bekanntes Individuum (exakt) 0.15 strikt_positive Bewertung von bekanntem Individuum (exakt) 0.10 strikt_kein Nationalismus 0.86 strikt_kein Heroismus 0.72 strikt_keine Religiosität 0.83 strikt_Personen-Marker (Titel + Text) 0.46 strikt_keine Zeit-/Geschichts-Marker 0.58 strikt_keine Ort-Marker 0.72 strikt_Objekt-/Institutionen-Marker (Text) 0.55 strikt_kein Bezug auf Überlieferung 0.77 strikt_keine Bewertung von Überlieferung 0.77 strikt_kein Bezug auf Geschichtsauffassungen 0.96 strikt_keine Bewertung von Geschichtsauffassungen 0.96 strikt_Ergänzung des Geschichtswissens 0.75 strikt_Reim 0.95 strikt_regelmäßiges Metrum 0.98 strikt_keine verfremdende Sprache 0.99 dtype: float64
Korpora¶
In [26]:
meta_plot = pd.concat([
meta_all.query("korpus_anth"),
meta_all.query("korpus_modcanon"),
meta_all.query("korpus_muench")
])
meta_plot['korpus'] = ['anth' if x['korpus_anth'] else 'modcanon' if x['korpus_modcanon'] else 'muench' for x in meta_plot.iloc]
px.box(
meta_plot,
y = 'mode_score_strikt',
color = 'korpus',
points = 'all',
hover_data = ['author', 'title']
)
Zusammenhang mit Zahl der Abdrucke¶
In [27]:
meta_all['count_min5'] = [1 if x >= 5 else 0 for x in meta_all['count']]
meta_all['count_min10'] = [1 if x >= 10 else 0 for x in meta_all['count']]
In [28]:
meta_all.query("corpus=='anth'")[[
'count', 'count_min5', 'count_min10',
'mode_score_strikt', 'dist_mean_alldistances'
]].corr()
Out[28]:
| count | count_min5 | count_min10 | mode_score_strikt | dist_mean_alldistances | |
|---|---|---|---|---|---|
| count | 1.000000 | 0.796814 | 0.778227 | 0.067258 | -0.059125 |
| count_min5 | 0.796814 | 1.000000 | 0.511927 | 0.067102 | -0.058313 |
| count_min10 | 0.778227 | 0.511927 | 1.000000 | 0.035281 | -0.039871 |
| mode_score_strikt | 0.067258 | 0.067102 | 0.035281 | 1.000000 | -0.912061 |
| dist_mean_alldistances | -0.059125 | -0.058313 | -0.039871 | -0.912061 | 1.000000 |
In [29]:
stats.pearsonr(
meta_all.query("corpus=='anth'")['mode_score_strikt'],
meta_all.query("corpus=='anth'")['count']
)
Out[29]:
PearsonRResult(statistic=0.06725808687682228, pvalue=0.003801235216825594)
In [30]:
stats.pearsonr(
meta_all.query("corpus=='anth'")['dist_mean_alldistances'],
meta_all.query("corpus=='anth'")['count']
)
Out[30]:
PearsonRResult(statistic=-0.0591246699114598, pvalue=0.010973088602442877)
In [31]:
meta_plot = meta_all.query("corpus=='anth'").copy()
meta_plot['count_min5'] = meta_plot['count_min5'].replace({0 : 'unter 5', 1: '5 oder mehr'})
px.box(
meta_plot.sort_values(by='count_min5', ascending=False),
x = 'count_min5',
y = 'mode_score_strikt',
points = 'all',
hover_data = ['author', 'title'],
labels = {'count_min5' : 'Vorkommen im Anthologiekorpus',
'mode_score_strikt' : 'Modus (strikt)'
}
)
In [32]:
stats.ttest_ind(
meta_all.query("corpus=='anth' and count_min5 == 1")['mode_score_strikt'],
meta_all.query("corpus=='anth' and count_min5 == 0")['mode_score_strikt']
)
Out[32]:
TtestResult(statistic=2.891123789799813, pvalue=0.003883510462262761, df=1848.0)
In [33]:
get_cohens_d(
meta_all.query("corpus=='anth' and count_min5 == 1")['mode_score_strikt'],
meta_all.query("corpus=='anth' and count_min5 == 0")['mode_score_strikt']
)
Out[33]:
0.26727065034081565
Texte¶
In [34]:
meta_all['words'] = [len(' '.join(x).split(" ")) if str(x) != 'None' else x for x in meta_all['text_bestocr']]
In [35]:
(meta_all.query("korpus_anth")[[
"author", "title", "year", "count", "mode_score_strikt", 'words', "missing_from_mode_strikt",
]]
.sort_values(by = "author")
.sort_values(by = "mode_score_strikt", ascending = False)
.query("mode_score_strikt >= 34")
)
Out[35]:
| author | title | year | count | mode_score_strikt | words | missing_from_mode_strikt | |
|---|---|---|---|---|---|---|---|
| 780 | Weinholz, Albert | Otto von Wittelsbachs Bergfahrt | 1858.0 | 1.0 | 35.0 | 648.0 | strikt_Beginn 1870 + strikt_Ende 1870 + strikt_bekanntes Individuum (exakt) + strikt_positive Bewertung von bekanntem Individuum (exakt) + strikt_kein Heroismus |
| 1094 | Meyer, Conrad Ferdinand | Die Schweizer des Herrn von Tremouille | 1875.0 | 2.0 | 35.0 | 338.0 | strikt_2 Zeitebenen (exakt) + strikt_Beginn 1870 + strikt_Ende 1870 + strikt_bekanntes Individuum (exakt) + strikt_positive Bewertung von bekanntem Individuum (exakt) |
| 129 | Priem, Johann Paul | Der Schneidergeneral. 1. Der Rekrut | 1858.0 | 1.0 | 34.0 | NaN | strikt_2 Zeitebenen (exakt) + strikt_Beginn 1870 + strikt_Ende 1870 + strikt_bekanntes Individuum (exakt) + strikt_positive Bewertung von bekanntem Individuum (exakt) + strikt_Personen-Marker (Titel + Text) |
| 1626 | Lingg, Hermann | Heinrich der Finkler | 1870.0 | 1.0 | 34.0 | 238.0 | strikt_Präsens und Präiteritum + strikt_Beginn 1870 + strikt_Ende 1870 + strikt_Heiliges Römisches Reich (exakt) + strikt_Krieg (exakt) + strikt_positive Bewertung von Krieg (exakt) |
| 292 | Brunold, Friedrich | König Christian I. von Dänemark und Henning Wulf | 1859.0 | 2.0 | 34.0 | 274.0 | strikt_2 Zeitebenen (exakt) + strikt_Beginn 1870 + strikt_Ende 1870 + strikt_positive Bewertung von Krieg (exakt) + strikt_bekanntes Individuum (exakt) + strikt_positive Bewertung von bekanntem Individuum (exakt) |
| 927 | Liliencron, Detlev von | Wibke Pogwisch | 1889.0 | 1.0 | 34.0 | 407.0 | strikt_Beginn 1870 + strikt_Ende 1870 + strikt_positive Bewertung von Krieg (exakt) + strikt_bekanntes Individuum (exakt) + strikt_positive Bewertung von bekanntem Individuum (exakt) + strikt_Personen-Marker (Titel + Text) |
| 833 | Helmers, Heinrich | Maria Theresia in Preßburg | 1887.0 | 1.0 | 34.0 | 290.0 | strikt_Erzählen (exakt) + strikt_Beginn 1870 + strikt_Ende 1870 + strikt_Kein Kleinraum + strikt_bekanntes Individuum (exakt) + strikt_positive Bewertung von bekanntem Individuum (exakt) |
| 1740 | Richter, Paul | Brusehawer | 1908.0 | 1.0 | 34.0 | 369.0 | strikt_Beginn 1870 + strikt_Ende 1870 + strikt_positive Bewertung von Krieg (exakt) + strikt_bekanntes Individuum (exakt) + strikt_positive Bewertung von bekanntem Individuum (exakt) + strikt_Personen-Marker (Titel + Text) |
In [36]:
meta_all.query("author.str.contains('Priem')")[[
'author', 'title', 'mode_score_strikt', 'dist_mean_euclidean'
]]
Out[36]:
| author | title | mode_score_strikt | dist_mean_euclidean | |
|---|---|---|---|---|
| 129 | Priem, Johann Paul | Der Schneidergeneral. 1. Der Rekrut | 34.0 | 2.021937 |
| 130 | Priem, Johann Paul | Der Schneidergeneral. 2. Der Sieg von Stralsund | 31.0 | 2.237389 |
In [37]:
(meta_all.query("korpus_anth")[[
"author", "title", "year", "count", "mode_score_strikt", 'words', # "missing_from_mode_strikt",
]]
.sort_values(by = "author")
.sort_values(by = "mode_score_strikt", ascending = True)
.query("mode_score_strikt <= 13")
)
Out[37]:
| author | title | year | count | mode_score_strikt | words | |
|---|---|---|---|---|---|---|
| 650 | Jahn, Franz | Erfüllung | 1870.0 | 3.0 | 11.0 | 132.0 |
| 620 | Meyer, Johannes | Der deutschen Jugend | 1881.0 | 1.0 | 12.0 | 180.0 |
| 1284 | Schack, Adolf Friedrich Graf von | Rast bei Milet | 1866.0 | 1.0 | 12.0 | 187.0 |
| 1309 | Lingg, Hermann | Pompeji | 1854.0 | 1.0 | 13.0 | 174.0 |
| 454 | Schrott, Johannes | König Ludwig I. | 1866.0 | 1.0 | 13.0 | 540.0 |
| 121 | Niedergesäß, Robert | Es treibet ohne Rast und Ruh | 1859.0 | 1.0 | 13.0 | NaN |
| 1682 | Fallersleben, Heinrich Hoffmann von | Weltgeschichte | 1871.0 | 1.0 | 13.0 | 69.0 |
| 1103 | Rosegger, Peter | Ein Blättchen Papier | 1875.0 | 1.0 | 13.0 | 153.0 |
Autor:innen¶
In [38]:
meta_plot = meta_all.query("korpus_anth").copy()
author_counts = meta_plot['author'].value_counts()
valid_authors = author_counts[author_counts >= 10].index
meta_plot = meta_plot.query("author.isin(@valid_authors)")
In [39]:
results = meta_plot.groupby('author').mean(numeric_only=True)
results.sort_values(by='mode_score_strikt', ascending=False)[['mode_score_strikt']]
Out[39]:
| mode_score_strikt | |
|---|---|
| author | |
| Böttger, Adolf | 30.400000 |
| Schrutz, Demetrius | 29.000000 |
| Müller von Königswinter, Wolfgang | 28.652174 |
| Krais, Julius | 28.500000 |
| Geißler, Max | 28.363636 |
| Stöber, Adolf | 27.800000 |
| Gruppe, Otto Friedrich | 27.690476 |
| Meyern, Gustav von | 27.583333 |
| Sturm, Julius | 27.550000 |
| Frey, Adolf | 27.500000 |
| Gaudy, Alice von | 27.333333 |
| Netz, Karl Ludwig | 26.900000 |
| Kirchner, Friedrich | 26.846154 |
| Meyer, Conrad Ferdinand | 26.720000 |
| Wickenburg, Albrecht von | 26.615385 |
| Weilen, Josef von | 26.538462 |
| Kaufmann, Alexander | 26.083333 |
| Liliencron, Detlev von | 26.058824 |
| Lissauer, Ernst | 25.666667 |
| Schults, Adolf | 25.363636 |
| Dahn, Felix | 25.301587 |
| Münchhausen, Börries von | 25.235294 |
| Hesekiel, George | 25.181818 |
| Fontane, Theodor | 25.153846 |
| Schack, Adolf Friedrich Graf von | 25.000000 |
| Lingg, Hermann | 24.987342 |
| Scheffel, Joseph Viktor von | 24.923077 |
| Greif, Martin | 24.892857 |
| Stieler, Karl | 24.823529 |
| Möser, Albert | 24.677966 |
| Vierordt, Heinrich | 24.612903 |
| Gerok, Karl | 23.833333 |
| Geibel, Emanuel | 23.722222 |
| Wildenbruch, Ernst von | 23.416667 |
Zeitverlauf und Korpusvergleich¶
In [40]:
queries_a = {
# "korpus_anth" : 'Anthologiekorpus',
"korpus_anth and decade == 1850" : '1850er',
"korpus_anth and decade == 1860" : '1860er',
"korpus_anth and decade == 1870" : '1870er',
"korpus_anth and decade == 1880" : '1880er',
"korpus_anth and decade == 1890" : '1890er',
"korpus_anth and decade == 1900" : '1900er',
"korpus_anth and decade == 1910" : '1910er',
"korpus_modcanon" : 'Kanonisierte Moderne',
"korpus_muench" : 'Münchhausen-Kreis'
}
In [41]:
queries_b = {
"korpus_anth and 1850 <= year <= 1854" : '1850–1854',
"korpus_anth and 1855 <= year <= 1859" : '1855–1859',
"korpus_anth and 1860 <= year <= 1864" : '1860–1864',
"korpus_anth and 1865 <= year <= 1869" : '1865–1869',
"korpus_anth and 1870 <= year <= 1874" : '1870–1874',
"korpus_anth and 1875 <= year <= 1879" : '1875–1879',
"korpus_anth and 1880 <= year <= 1884" : '1880–1884',
"korpus_anth and 1885 <= year <= 1889" : '1885–1889',
"korpus_anth and 1890 <= year <= 1894" : '1890–1894',
"korpus_anth and 1895 <= year <= 1899" : '1895–1899',
"korpus_anth and 1900 <= year <= 1904" : '1900–1904',
"korpus_anth and 1905 <= year <= 1909" : '1905–1909',
"korpus_anth and 1910 <= year <= 1914" : '1910–1914',
"korpus_anth and 1915 <= year <= 1918" : '1915–1918',
"korpus_modcanon" : 'Kanonisierte Moderne',
"korpus_muench" : 'Münchhausen-Kreis'
}
Anthologiekorpus 1850er, Modus¶
In [42]:
meta_plot = pd.DataFrame()
for corpus_query in queries_a:
this_meta = meta_all.query(corpus_query)
this_distances = this_meta['mode_score_strikt1850']
meta_add = pd.DataFrame()
meta_add[['author', 'title']] = this_meta[['author', 'title']]
meta_add['corpus'] = queries_a[corpus_query]
meta_add['dist'] = this_distances.tolist()
meta_plot = pd.concat([meta_plot, meta_add])
fig = px.box(
meta_plot,
x = 'corpus',
y = 'dist',
# points = 'all',
hover_data = ['author', 'title'],
labels = {'dist' : '<br>Modus_1850 (strikt)', 'corpus' : ''}
)
fig.update_layout(
width = 1000, height = 600,
xaxis=dict(tickfont=dict(size=20), titlefont=dict(size=20)),
yaxis=dict(tickfont=dict(size=20), titlefont=dict(size=20)),
showlegend = False
)
fig.write_image(f"plots/6.2 Moduswerte je Textgruppe, Bezugskorpus: Anthologietexte der 1850er.pdf")
fig.show()
In [43]:
meta_plot = pd.DataFrame()
for corpus_query in queries_b:
this_meta = meta_all.query(corpus_query)
this_distances = this_meta['mode_score_strikt1850']
meta_add = pd.DataFrame()
meta_add[['author', 'title']] = this_meta[['author', 'title']]
meta_add['corpus'] = queries_b[corpus_query]
meta_add['dist'] = this_distances.tolist()
meta_plot = pd.concat([meta_plot, meta_add])
fig = px.box(
meta_plot,
x = 'corpus',
y = 'dist',
# points = 'all',
hover_data = ['author', 'title'],
labels = {'dist' : '<br>Modus_1850 (strikt)', 'corpus' : ''}
)
fig.update_layout(
width = 1000, height = 800,
xaxis=dict(tickfont=dict(size=20), titlefont=dict(size=20)),
yaxis=dict(tickfont=dict(size=20), titlefont=dict(size=20)),
showlegend = False
)
fig.show()
Anthologiekorpus_1850er, Distanzen¶
In [44]:
this_dm = dm_alldistances
In [45]:
meta_1850 = meta_all.query("korpus_anth and 1850 <= year <= 1859")
In [46]:
meta_plot = pd.DataFrame()
for corpus_query in queries_a:
this_meta = meta_all.query(corpus_query)
this_corpus_dm = this_dm.loc[meta_1850.id, this_meta.id]
this_distances = this_corpus_dm.mean()
meta_add = pd.DataFrame()
meta_add[['author', 'title', 'year', 'count', 'mode_score_strikt1850']] = this_meta[[
'author', 'title', 'year', 'count', 'mode_score_strikt1850'
]]
meta_add['corpus'] = queries_a[corpus_query]
meta_add['dist'] = this_distances.tolist()
meta_plot = pd.concat([meta_plot, meta_add])
fig = px.box(
meta_plot,
x = 'corpus',
y = 'dist',
# points = 'all',
hover_data = ['author', 'title'],
labels = {'dist' : '<br>Mittelwert Distanzen_1850<br>(alle, gewichtet)', 'corpus' : ''}
)
fig.update_layout(
width = 1000, height = 600,
xaxis=dict(tickfont=dict(size=20), titlefont=dict(size=20)),
yaxis=dict(tickfont=dict(size=20), titlefont=dict(size=20)),
showlegend = False
)
fig.write_image(f"plots/6.2 Distanzwerte je Textgruppe, Bezugskorpus: Anthologietexte der 1850er.pdf")
fig.show()
In [47]:
meta_plot = pd.DataFrame()
for corpus_query in queries_b:
this_meta = meta_all.query(corpus_query)
this_corpus_dm = this_dm.loc[meta_1850.id, this_meta.id]
this_distances = this_corpus_dm.mean()
meta_add = pd.DataFrame()
meta_add[['author', 'title', 'year', 'count', 'mode_score_strikt1850']] = this_meta[[
'author', 'title', 'year', 'count', 'mode_score_strikt1850'
]]
meta_add['corpus'] = queries_b[corpus_query]
meta_add['dist'] = this_distances.tolist()
meta_plot = pd.concat([meta_plot, meta_add])
fig = px.box(
meta_plot,
x = 'corpus',
y = 'dist',
# points = 'all',
hover_data = ['author', 'title'],
labels = {'dist' : '<br>Mittelwert Distanzen_1850<br>(alle, gewichtet)', 'corpus' : ''}
)
fig.update_layout(
width = 1000, height = 800,
xaxis=dict(tickfont=dict(size=20), titlefont=dict(size=20)),
yaxis=dict(tickfont=dict(size=20), titlefont=dict(size=20)),
showlegend = False
)
fig.show()
Zusammenhang (im Anthologiekorpus)?¶
In [48]:
meta_corr = meta_all.query("corpus=='anth'").copy()
meta_corr_dm = dm_alldistances.loc[meta_1850.id, meta_corr.id]
meta_corr_distances = meta_corr_dm.mean()
meta_corr['dist'] = meta_corr_distances.tolist()
In [49]:
meta_corr[[
'year',
'mode_score_strikt1850', 'dist',
]].corr() # .round(2)
Out[49]:
| year | mode_score_strikt1850 | dist | |
|---|---|---|---|
| year | 1.000000 | -0.118972 | 0.113978 |
| mode_score_strikt1850 | -0.118972 | 1.000000 | -0.929779 |
| dist | 0.113978 | -0.929779 | 1.000000 |
In [50]:
# Zusammenhang mode_score_strikt1850 (r, p)
years = meta_corr['year']
centrality = meta_corr['mode_score_strikt1850']
r, p_value = stats.pearsonr(years, centrality)
print(f"{r} / {round(p_value, 10)}")
-0.11897204582514027 / 2.867e-07
In [51]:
# Zusammenhang dist_mean_alldistances (r, p)
years = meta_corr['year']
centrality = meta_corr['dist']
r, p_value = stats.pearsonr(years, centrality)
print(f"{r} / {round(p_value, 10)}")
0.11397785048239731 / 8.874e-07
In [52]:
fig = px.scatter(
meta_corr,
x = 'year',
y = 'dist',
hover_data = ['author', 'title'],
trendline = 'ols',
labels = {'dist': '<br>Mittelwert Distanzen_1850<br>(alle, gewichtet)',
'year' : ''
}
)
fig.show()
Dimensionsreduktion¶
Features für Plot-Einfärbung¶
In [53]:
for i, gattung in enumerate(meta_all['gattung']):
if gattung == 'Ballade':
meta_all.at[i, 'gattung_color'] = 'Ballade'
elif gattung == 'Lied':
meta_all.at[i, 'gattung_color'] = 'Lied'
elif gattung == 'Denkmal-/Ruinenpoesie':
meta_all.at[i, 'gattung_color'] = 'Denkmal-/Ruinenpoesie'
elif gattung == 'Sonett':
meta_all.at[i, 'gattung_color'] = 'Sonett'
elif gattung == 'Rollengedicht':
meta_all.at[i, 'gattung_color'] = 'Rollengedicht'
elif ' + ' in str(gattung):
meta_all.at[i, 'gattung_color'] = '[mehrere annotierte Gattungen]'
else:
meta_all.at[i, 'gattung_color'] = '[keine annotierte Gattung]'
meta_all['gattung_color_order'] = meta_all['gattung_color'].replace({
'[keine annotierte Gattung]' : 0,
'[mehrere annotierte Gattungen]' : 1,
'Ballade' : 2,
'Rollengedicht' : 3,
'Denkmal-/Ruinenpoesie' : 4,
'Lied' : 5,
'Sonett' : 6
})
/var/folders/45/zsyytpq97xq280z_cvw88j240000gn/T/ipykernel_6439/1334992829.py:17: FutureWarning:
Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`
In [54]:
# for i, (sprechinstanz_markiert, vergangenheitsdominant) in enumerate(zip(meta_all['sprechinstanz_markiert'], meta_all['vergangenheitsdominant'])):
# if sprechinstanz_markiert == 1 and vergangenheitsdominant == 1:
# meta_all.at[i, 'vergangenheitsdominant'] = 'Sprechinstanz markiert +<br>vergangenheitsdominant'
# elif sprechinstanz_markiert == 1 and vergangenheitsdominant != 1:
# meta_all.at[i, 'vergangenheitsdominant'] = 'Sprechinstanz markiert +<br>nicht vergangenheitsdomainant'
# elif sprechinstanz_markiert == 0 and vergangenheitsdominant == 1:
# meta_all.at[i, 'sprechinstanz_vergangenheitsdominant'] = 'Sprechinstanz nicht markiert +<br>vergangenheitsdominant'
# else:
# meta_all.at[i, 'sprechinstanz_vergangenheitsdominant'] = 'Sprechinstanz nicht markiert +<br>nicht vergangenheitsdomainant'
In [55]:
for i, vergangenheitsdominant in enumerate(meta_all['vergangenheitsdominant']):
if vergangenheitsdominant == 1:
meta_all.at[i, 'vergangenheitsdominant'] = 'vergangenheitsdominant'
else:
meta_all.at[i, 'vergangenheitsdominant'] = 'nicht vergangenheitsdominant'
/var/folders/45/zsyytpq97xq280z_cvw88j240000gn/T/ipykernel_6439/2378240172.py:3: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'vergangenheitsdominant' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
In [56]:
rating_table = get_rating_table(meta_all, mode = 'themes')
rating_table['author_title'] = rating_table['author'] + ' – ' + rating_table['title']
for i, author_title in enumerate(meta_all['author_title']):
this_ratings = rating_table.query("author_title == @author_title")
if 'Militär/Krieg' in this_ratings['type'].tolist():
if this_ratings.query("type=='Militär/Krieg'")['rating'].tolist()[0] == '1':
meta_all.at[i, 'Militär/Krieg'] = 'behandelt und positiv bewertet'
else:
meta_all.at[i, 'Militär/Krieg'] = 'behandelt und negativ bewertet'
else:
meta_all.at[i, 'Militär/Krieg'] = 'nicht behandelt'
In [57]:
for i, element in enumerate(meta_all.iloc):
if element.korpus_anth and element.korpus_modcanon == False and element.korpus_muench == False:
meta_all.at[i, 'korpus_color'] = 'Anthologiekorpus'
elif element.korpus_anth == False and element.korpus_modcanon and element.korpus_muench == False:
meta_all.at[i, 'korpus_color'] = 'Kanonisierte Moderne'
elif element.korpus_anth == False and element.korpus_modcanon == False and element.korpus_muench:
meta_all.at[i, 'korpus_color'] = 'Münchhausen-Kreis'
else:
meta_all.at[i, 'korpus_color'] = '[Mehrere Korpora]'
meta_all['korpus_color_order'] = meta_all['korpus_color'].replace({
'Anthologiekorpus' : 0,
'Kanonisierte Moderne' : 1,
'Münchhausen-Kreis' : 2,
'[Mehrere Korpora]' : 3
})
/var/folders/45/zsyytpq97xq280z_cvw88j240000gn/T/ipykernel_6439/2452535144.py:11: FutureWarning:
Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`
In [58]:
for i, element in enumerate(meta_all.iloc):
if element.korpus_anth and element.korpus_modcanon == False and element.korpus_muench == False:
meta_all.at[i, 'korpus_color'] = 'Anthologiekorpus'
elif element.korpus_anth == False and element.korpus_modcanon and element.korpus_muench == False:
meta_all.at[i, 'korpus_color'] = 'Kanonisierte Moderne'
elif element.korpus_anth == False and element.korpus_modcanon == False and element.korpus_muench:
meta_all.at[i, 'korpus_color'] = 'Münchhausen-Kreis'
else:
meta_all.at[i, 'korpus_color'] = '[Mehrere Korpora]'
meta_all['korpus_color_order'] = meta_all['korpus_color'].replace({
'Anthologiekorpus' : 0,
'Kanonisierte Moderne' : 1,
'Münchhausen-Kreis' : 2,
'[Mehrere Korpora]' : 3
})
/var/folders/45/zsyytpq97xq280z_cvw88j240000gn/T/ipykernel_6439/2452535144.py:11: FutureWarning:
Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`
In [59]:
meta_all['anthology_decade'] = [(x//10)*10 if pd.isna(x) == False else float('NaN') for x in meta_all['anthology_year_used_ed']]
UMAP¶
In [60]:
this_dm = dm_alldistances
In [61]:
import umap.umap_ as umap
n_components = 2
model = umap.UMAP(
n_components = n_components,
metric = 'precomputed',
random_state=0,
)
In [62]:
column_names = ['umap_dim_' + str(i+1) for i in range(n_components)]
In [63]:
meta_all = meta_all.copy()
meta_all[column_names] = model.fit_transform(this_dm.loc[meta_all['id'], meta_all['id']])
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/umap/umap_.py:1865: UserWarning: using precomputed metric; inverse_transform will be unavailable /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/umap/umap_.py:1952: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
In [64]:
meta_plot = meta_all.copy()
fig = px.scatter(
meta_plot,
x = 'umap_dim_1',
y = 'umap_dim_2',
hover_data = ['author', 'title', 'gattung'],
labels = {'umap_dim_1' : '', 'umap_dim_2' : ''}
)
fig.update_traces(marker={'size': 6})
fig.update_layout(
width = 1000, height = 500,
legend=dict(font=dict(size=16)),
xaxis=dict(tickfont=dict(size=20), titlefont=dict(size=20)),
yaxis=dict(tickfont=dict(size=20), titlefont=dict(size=20)),
)
fig.update_layout(legend= {'itemsizing': 'constant'})
fig.write_image(f"plots/6.2 Zweidimensionale Projektion.pdf")
fig.show()
In [65]:
fig = px.scatter(
meta_plot.sort_values(by = 'decade', ascending = True),
x = 'umap_dim_1',
y = 'umap_dim_2',
color = 'year',
# color_discrete_sequence=['Black', 'yellow', 'lightgreen', 'red'],
hover_data = ['author', 'title', 'gattung'],
labels = {'umap_dim_1' : '', 'umap_dim_2' : '', 'year' : ''}
)
fig.update_layout(
width = 1000, height = 500,
xaxis=dict(tickfont=dict(size=20), titlefont=dict(size=20)),
yaxis=dict(tickfont=dict(size=20), titlefont=dict(size=20)),
)
fig.update_traces(marker={'size': 6})
fig.write_image(f"plots/6.2 Zweidimensionale Projektion (Jahre).pdf")
fig.show()
In [66]:
meta_plot = meta_all.copy()
fig = px.scatter(
meta_plot.sort_values(by = 'korpus_color_order'),
x = 'umap_dim_1',
y = 'umap_dim_2',
color = 'korpus_color',
color_discrete_sequence=['Black', 'yellow', 'lightgreen', 'grey'],
hover_data = ['author', 'title', 'gattung'],
labels = {'umap_dim_1' : '', 'umap_dim_2' : '', 'korpus_color' : 'Korpus'}
)
fig.update_layout(
width = 1000, height = 500,
legend=dict(font=dict(size=16)),
xaxis=dict(tickfont=dict(size=20), titlefont=dict(size=20)),
yaxis=dict(tickfont=dict(size=20), titlefont=dict(size=20)),
)
fig.update_traces(marker={'size': 6})
fig.update_layout(legend= {'itemsizing': 'constant'})
fig.write_image(f"plots/6.2 Zweidimensionale Projektion (Korpora).pdf")
fig.show()
In [67]:
meta_plot = meta_all.copy()
fig = px.scatter(
meta_plot.query("anthology_decade < 1950"),
x = 'umap_dim_1',
y = 'umap_dim_2',
color = 'anthology_year_used_ed',
hover_data = ['author', 'title', 'gattung', 'anthology'],
labels = {'umap_dim_1' : '', 'umap_dim_2' : '', 'anthology_year_used_ed' : 'Erscheinungsdatum<br>Anthologie'}
)
fig.update_layout(
# width = 1000, height = 600,
legend=dict(font=dict(size=16)),
xaxis=dict(tickfont=dict(size=20), titlefont=dict(size=20)),
yaxis=dict(tickfont=dict(size=20), titlefont=dict(size=20)),
)
fig.update_traces(marker={'size': 6})
fig.update_layout(legend= {'itemsizing': 'constant'})
fig.show()
In [68]:
meta_plot = meta_all.copy()
fig = px.scatter(
meta_plot.sort_values(by = 'gattung_color_order'),
x = 'umap_dim_1',
y = 'umap_dim_2',
color = 'gattung_color',
color_discrete_sequence = ['Black', '#6e7f80'] + px.colors.qualitative.Plotly[:5],
hover_data = ['author', 'title', 'gattung'],
labels = {'umap_dim_1' : '', 'umap_dim_2' : '', 'gattung_color' : 'Gattung'}
)
fig.update_traces(marker={'size': 6})
fig.update_layout(
# width = 1000, height = 600,
legend=dict(font=dict(size=16)),
xaxis=dict(tickfont=dict(size=20), titlefont=dict(size=20)),
yaxis=dict(tickfont=dict(size=20), titlefont=dict(size=20)),
)
fig.update_layout(legend= {'itemsizing': 'constant'})
fig.show()
In [69]:
meta_plot = meta_all.copy()
fig = px.scatter(
meta_plot.sort_values(by='vergangenheitsdominant'),
x = 'umap_dim_1',
y = 'umap_dim_2',
color = 'vergangenheitsdominant',
hover_data = ['author', 'title', 'gattung'],
labels = {'umap_dim_1' : '', 'umap_dim_2' : ''}
)
fig.update_traces(marker={'size': 6})
fig.update_layout(
# width = 1000, height = 600,
legend=dict(font=dict(size=16)),
xaxis=dict(tickfont=dict(size=20), titlefont=dict(size=20)),
yaxis=dict(tickfont=dict(size=20), titlefont=dict(size=20)),
)
fig.update_layout(legend= {'itemsizing': 'constant'})
fig.show()
In [70]:
meta_plot = meta_all.copy()
fig = px.scatter(
meta_plot.sort_values(by = 'Militär/Krieg', ascending=False),
x = 'umap_dim_1',
y = 'umap_dim_2',
color = 'Militär/Krieg',
hover_data = ['author', 'title', 'gattung'],
labels = {'umap_dim_1' : '', 'umap_dim_2' : '', 'Militär/Krieg': 'Stoffgebiet Militär/Krieg'}
)
fig.update_traces(marker={'size': 6})
fig.update_layout(
# width = 1000, height = 600,
legend=dict(font=dict(size=16)),
xaxis=dict(tickfont=dict(size=20), titlefont=dict(size=20)),
yaxis=dict(tickfont=dict(size=20), titlefont=dict(size=20)),
)
fig.update_layout(legend= {'itemsizing': 'constant'})
fig.show()
In [71]:
# meta_plot = meta_all.copy()
#
# fig = px.scatter(
# meta_plot,
# x = 'umap_dim_1',
# y = 'umap_dim_2',
# color = 'sprechinstanz_zeitdominanz',
# hover_data = ['author', 'title', 'gattung'],
# labels = {'umap_dim_1' : '', 'umap_dim_2' : '',
# 'sprechinstanz_zeitdominanz' : 'Sprechinstanz und Zeitdominanz'}
# )
#
# fig.update_traces(marker={'size': 6})
# fig.update_layout(
# # width = 1000, height = 600,
# legend=dict(font=dict(size=16)),
# xaxis=dict(tickfont=dict(size=20), titlefont=dict(size=20)),
# yaxis=dict(tickfont=dict(size=20), titlefont=dict(size=20)),
# )
# fig.update_layout(legend= {'itemsizing': 'constant'})
#
# fig.show()
Balladen unter nicht vergangenheitsdominanten Texten¶
In [72]:
meta_all['militaer_positiv'] = [1 if x == 'behandelt und positiv bewertet' else 0 for x in meta_all['Militär/Krieg']]
meta_all['period'] = ['1850–1884' if 1850 <= x <= 1884 else '1885–1918' for x in meta_all['year']]
In [73]:
meta_test = meta_all.query("vergangenheitsdominant!='vergangenheitsdominant' and corpus=='anth'").copy()
In [74]:
results = pd.crosstab(meta_test['period'], meta_test['militaer_positiv'], margins=True)
results['0_rel'] = results[0]/results['All']
results['1_rel'] = results[1]/results['All']
results
Out[74]:
| militaer_positiv | 0 | 1 | All | 0_rel | 1_rel |
|---|---|---|---|---|---|
| period | |||||
| 1850–1884 | 144 | 79 | 223 | 0.645740 | 0.354260 |
| 1885–1918 | 82 | 22 | 104 | 0.788462 | 0.211538 |
| All | 226 | 101 | 327 | 0.691131 | 0.308869 |
In [75]:
chi2_contingency(pd.crosstab(meta_test['period'], meta_test['militaer_positiv']), correction=False)
Out[75]:
Chi2ContingencyResult(statistic=6.767616255943135, pvalue=0.009282673896133627, dof=1, expected_freq=array([[154.12232416, 68.87767584],
[ 71.87767584, 32.12232416]]))
In [76]:
get_phi(np.array(pd.crosstab(meta_test['period'], meta_test['militaer_positiv'])))
Out[76]:
0.14386130187509125
Vergleich: ohne Nicht-Vergangenheitsdominanz¶
In [77]:
meta_all.query("corpus=='anth'").groupby('period')['militaer_positiv'].mean()
Out[77]:
period 1850–1884 0.229569 1885–1918 0.208333 Name: militaer_positiv, dtype: float64
In [78]:
chi2_contingency(pd.crosstab(
meta_all.query("corpus=='anth'")['period'],
meta_all.query("corpus=='anth'")['militaer_positiv']),
correction=False)[1]
Out[78]:
0.32921562346581856
In [79]:
get_phi(np.array(pd.crosstab(
meta_all.query("corpus=='anth'")['period'],
meta_all.query("corpus=='anth'")['militaer_positiv'],
)))
Out[79]:
0.022684448317960584